// Copyright 2017 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the “License”); you may not use this file except in
// compliance with the License.  You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
/////////////////////////////////////////////////////////
// includes
/////////////////////////////////////////////////////////
#include "mlFir.h"
#include "bar.h"
#include "utils.h"
#include "bench.h"
#include "math_fns.h"

/////////////////////////////////////////////////////////
// shared globals
/////////////////////////////////////////////////////////

    __attribute__ ((section(".heapsram"))) static const float fv0[900] = 
    { 0.209497854F, 0.12901178F, 0.188277856F,
    0.19409056F, 0.180111453F, 0.214800045F, 0.239947945F, 0.116619118F,
    0.124313384F, 0.165686682F, 0.218034521F, 0.208809152F, 0.163281038F,
    0.206790447F, 0.275887F, 0.144741923F, 0.152077064F, 0.116280213F,
    0.207667291F, 0.23479104F, 0.18274419F, 0.259313256F, 0.192495018F,
    0.207404524F, 0.234318316F, 0.203819051F, 0.184034914F, 0.236597389F,
    0.130654573F, 0.152429909F, 0.211943567F, 0.173095867F, 0.177939489F,
    0.251434505F, 0.17677702F, 0.219314426F, 0.17980136F, 0.154145911F,
    0.192868263F, 0.179776907F, 0.070464164F, 0.159477666F, 0.12119633F,
    0.181445867F, 0.138484508F, 0.209959164F, 0.261865318F, 0.287212819F,
    0.233876914F, 0.0962036699F, 0.238342091F, 0.221246481F, 0.179762393F,
    0.295203298F, 0.151480526F, 0.266808957F, 0.125198171F, 0.152625531F,
    0.101749763F, 0.190453231F, 0.201289937F, 0.13902238F, 0.0983869359F,
    0.0297520906F, 0.121086657F, 0.0999563113F, 0.206095085F, 0.222656459F,
    0.0680825263F, 0.0767844766F, 0.139983401F, 0.213100195F, 0.231092811F,
    0.251799643F, 0.172584638F, 0.13281554F, 0.116651192F, 0.197426796F,
    0.217799559F, 0.104767144F, 0.178669184F, 0.159030661F, 0.211663231F,
    0.235883743F, 0.239208937F, 0.202168345F, 0.156748608F, 0.159007698F,
    0.124715649F, 0.239664987F, 0.147452056F, 0.0887716115F, 0.131395698F,
    0.244953513F, 0.187444508F, 0.159275874F, 0.192965761F, 0.202196345F,
    0.202909604F, 0.175282076F, 0.296816111F, 0.237056091F, 0.0518260673F,
    0.111845352F, 0.164031908F, 0.226693705F, 0.182748169F, 0.194802061F,
    0.177158475F, 0.119259484F, 0.280930787F, 0.181112856F, 0.33476615F,
    0.192185357F, 0.230919123F, 0.182726353F, 0.216322482F, 0.321136117F,
    0.324020654F, 0.239089757F, 0.208912835F, 0.17634663F, 0.233117744F,
    0.153590128F, 0.291818738F, 0.181742758F, 0.315284252F, 0.262567163F,
    0.309988648F, 0.288747817F, 0.421759129F, 0.367705524F, 0.421539813F,
    0.334379792F, 0.337772429F, 0.213456497F, 0.198005438F, 0.231043339F,
    0.302344203F, 0.271228552F, 0.305484593F, 0.418890029F, 0.401012868F,
    0.388260663F, 0.327592582F, 0.312143087F, 0.401227832F, 0.477048516F,
    0.384313077F, 0.454261601F, 0.263260126F, 0.24542743F, 0.317756265F,
    0.363533407F, 0.304926F, 0.379173547F, 0.404168785F, 0.311373413F,
    0.342224538F, 0.419896543F, 0.301319867F, 0.356041342F, 0.388544112F,
    0.485958576F, 0.388377756F, 0.220341727F, 0.126361087F, 0.278043211F,
    0.41792509F, 0.468381494F, 0.399569631F, 0.406005323F, 0.35081172F,
    0.397410363F, 0.297637641F, 0.336367935F, 0.347605228F, 0.432353377F,
    0.338791072F, 0.414639771F, 0.0997332335F, 0.246806785F, 0.419307977F,
    0.394532561F, 0.436308116F, 0.492300838F, 0.494498223F, 0.391583562F,
    0.455112934F, 0.342380583F, 0.284459F, 0.328325242F, 0.269695342F,
    0.353641599F, 0.380009681F, 0.259667814F, 0.305874676F, 0.502671778F,
    0.477507859F, 0.459587365F, 0.424785495F, 0.368239343F, 0.398899466F,
    0.361653298F, 0.28786F, 0.354934335F, 0.313629031F, 0.361044109F,
    0.369395345F, 0.442209899F, 0.152530149F, 0.329889327F, 0.461734712F,
    0.457406968F, 0.427846968F, 0.458660156F, 0.45266965F, 0.269329578F,
    0.385874957F, 0.38287577F, 0.375646293F, 0.303135067F, 0.349730492F,
    0.356691658F, 0.38826853F, 0.25218147F, 0.187289059F, 0.187974378F,
    0.340425819F, 0.407342941F, 0.406850785F, 0.494960785F, 0.3619124F,
    0.347180903F, 0.437645018F, 0.355275542F, 0.372657806F, 0.441161692F,
    0.360937029F, 0.391683877F, 0.23154825F, 0.148095503F, 0.33384338F,
    0.343426943F, 0.467629701F, 0.534961F, 0.384141743F, 0.364334971F,
    0.451752186F, 0.285854787F, 0.304973364F, 0.32834506F, 0.391059071F,
    0.339811623F, 0.355563F, 0.201275617F, 0.231008068F, 0.296292067F,
    0.36171174F, 0.359959066F, 0.431860447F, 0.434365392F, 0.42483452F,
    0.327722222F, 0.249766231F, 0.357767344F, 0.32930702F, 0.355938047F,
    0.374649584F, 0.395492643F, 0.285834163F, 0.287054449F, 0.426004112F,
    0.346686274F, 0.367538452F, 0.396886975F, 0.420156F, 0.408508807F,
    0.505949497F, 0.277856F, 0.242780149F, 0.307819545F, 0.304472983F,
    0.343485862F, 0.455584794F, 0.161123127F, 0.335526109F, 0.438497424F,
    0.383641779F, 0.512158453F, 0.433912843F, 0.365702242F, 0.37057215F,
    0.399841964F, 0.431066841F, 0.363253117F, 0.235834926F, 0.374537766F,
    0.357072175F, 0.467763901F, 0.206259504F, 0.335871249F, 0.410441905F,
    0.464552969F, 0.537217081F, 0.391034901F, 0.382038832F, 0.456394881F,
    0.371146F, 0.385765F, 0.438207209F, 0.289189667F, 0.282524824F, 0.337696403F,
    0.322538286F, 0.232360989F, 0.356232047F, 0.394157857F, 0.531566441F,
    0.58333981F, 0.47053659F, 0.415593594F, 0.386375546F, 0.336258471F,
    0.31518054F, 0.250816F, 0.269497752F, 0.332605124F, 0.278926462F,
    0.372442842F, 0.194971263F, 0.400913775F, 0.433784872F, 0.447988898F,
    0.474025488F, 0.415850163F, 0.269741714F, 0.34899959F, 0.413218111F,
    0.405414283F, 0.349803448F, 0.446733892F, 0.411548F, 0.34294045F,
    0.343860149F, 0.29034853F, 0.492005557F, 0.529488921F, 0.411879808F,
    0.509283066F, 0.349117756F, 0.402537644F, 0.380561322F, 0.400981545F,
    0.355227113F, 0.48666206F, 0.410379618F, 0.375623554F, 0.357817411F,
    0.293308973F, 0.201697454F, 0.486214787F, 0.494847447F, 0.481456548F,
    0.475705236F, 0.476083159F, 0.416302979F, 0.414527416F, 0.474740773F,
    0.409471929F, 0.499921232F, 0.343330473F, 0.404460877F, 0.407102853F,
    0.240235448F, 0.336090028F, 0.445464462F, 0.438225567F, 0.472871333F,
    0.469091952F, 0.476072311F, 0.442563236F, 0.603123307F, 0.682523191F,
    0.570366919F, 0.499452859F, 0.429841876F, 0.338204F, 0.221404612F,
    0.257847607F, 0.401898056F, 0.509396374F, 0.53402549F, 0.562449753F,
    0.54955554F, 0.430245966F, 0.488519609F, 0.728734553F, 0.519462526F,
    0.467793643F, 0.495452881F, 0.355629414F, 0.196323872F, 0.116255417F,
    0.268655717F, 0.35806945F, 0.482171983F, 0.495220274F, 0.506285727F,
    0.503128827F, 0.502278149F, 0.410593599F, 0.647030473F, 0.531428874F,
    0.482206494F, 0.406353891F, 0.326621294F, 0.326834947F, 0.306349516F,
    0.276103705F, 0.331284404F, 0.531914175F, 0.577173889F, 0.507221222F,
    0.472933292F, 0.488088131F, 0.557616115F, 0.709984958F, 0.506908894F,
    0.384493351F, 0.369198114F, 0.244795442F, 0.366789162F, 0.344206601F,
    0.255290091F, 0.184208751F, 0.367667526F, 0.429845691F, 0.430090964F,
    0.521205F, 0.512721F, 0.572611153F, 0.674201846F, 0.500270069F, 0.440694243F,
    0.297475278F, 0.262167752F, 0.298381686F, 0.297429651F, 0.319640756F,
    0.251831979F, 0.470034808F, 0.329652339F, 0.504219055F, 0.451765954F,
    0.480156243F, 0.478310525F, 0.715205848F, 0.507581711F, 0.474916548F,
    0.468852788F, 0.434615701F, 0.27907595F, 0.175879836F, 0.257878333F,
    0.376271635F, 0.551977873F, 0.440768927F, 0.505932271F, 0.56038785F,
    0.426908493F, 0.451953351F, 0.691707611F, 0.454180717F, 0.515308738F,
    0.370916635F, 0.284538031F, 0.278312773F, 0.249812901F, 0.244669482F,
    0.408567578F, 0.539751947F, 0.53629458F, 0.494028062F, 0.56735146F,
    0.475931942F, 0.595861495F, 0.657725513F, 0.493509114F, 0.437028229F,
    0.357750118F, 0.199356019F, 0.178871572F, 0.370568395F, 0.320789099F,
    0.258433104F, 0.522635F, 0.472839F, 0.578809202F, 0.531132042F, 0.514091134F,
    0.641936362F, 0.656068683F, 0.536826491F, 0.49929285F, 0.317667663F,
    0.23547104F, 0.211455852F, 0.28324616F, 0.354677796F, 0.235272422F,
    0.346255898F, 0.510488F, 0.480947495F, 0.577801347F, 0.639569938F,
    0.577529967F, 0.608919442F, 0.547525346F, 0.482127607F, 0.329298496F,
    0.313609213F, 0.320808619F, 0.258857608F, 0.282976687F, 0.162272573F,
    0.402920067F, 0.49401477F, 0.553873897F, 0.47910887F, 0.471221447F,
    0.598739326F, 0.501380503F, 0.525209665F, 0.513205171F, 0.342641205F,
    0.37182793F, 0.411500305F, 0.323587894F, 0.423137397F, 0.307165176F,
    0.553605556F, 0.533760965F, 0.700083435F, 0.512025714F, 0.4272587F,
    0.602984548F, 0.519521F, 0.550559103F, 0.394021153F, 0.457646787F,
    0.575573564F, 0.302724302F, 0.266060859F, 0.383191973F, 0.2656703F,
    0.549450517F, 0.570402145F, 0.483891934F, 0.489438981F, 0.420328945F,
    0.527323544F, 0.499775708F, 0.563579798F, 0.365729958F, 0.457300365F,
    0.4120951F, 0.276322633F, 0.224992216F, 0.269467652F, 0.251696944F,
    0.52032727F, 0.540179491F, 0.468119591F, 0.381898671F, 0.448754162F,
    0.592588365F, 0.596721709F, 0.513365567F, 0.416449845F, 0.355909109F,
    0.301120877F, 0.280432761F, 0.251983583F, 0.297144741F, 0.272250503F,
    0.386949211F, 0.373972684F, 0.43894273F, 0.395798147F, 0.381449938F,
    0.446056545F, 0.532245696F, 0.556623578F, 0.373269796F, 0.365234375F,
    0.343798399F, 0.283956021F, 0.323437214F, 0.424576938F, 0.186707601F,
    0.365275711F, 0.373276085F, 0.494112223F, 0.412538648F, 0.395666063F,
    0.543828428F, 0.637220263F, 0.513437271F, 0.295566142F, 0.288694471F,
    0.288063824F, 0.314125776F, 0.31071955F, 0.291991085F, 0.233279154F,
    0.206997052F, 0.347513348F, 0.389947951F, 0.433072537F, 0.397342473F,
    0.483831465F, 0.433392882F, 0.572284758F, 0.370228082F, 0.284811825F,
    0.257617295F, 0.279227287F, 0.313644826F, 0.194274068F, 0.204277918F,
    0.129688561F, 0.253663272F, 0.301012307F, 0.367694497F, 0.50001055F,
    0.409290612F, 0.476956487F, 0.433650702F, 0.36450851F, 0.39153862F,
    0.285964966F, 0.298499286F, 0.176351815F, 0.16564548F, 0.232204899F,
    0.163343355F, 0.133437037F, 0.291540593F, 0.275033683F, 0.370865911F,
    0.327630699F, 0.390734941F, 0.338948488F, 0.235007823F, 0.284217983F,
    0.248744413F, 0.276674628F, 0.209023118F, 0.161857218F, 0.193805918F,
    0.216462359F, 0.196447313F, 0.339607298F, 0.319902271F, 0.339010924F,
    0.348400027F, 0.397019863F, 0.353468448F, 0.333834171F, 0.344706118F,
    0.309069395F, 0.260920286F, 0.256127506F, 0.317071676F, 0.275559783F,
    0.320468456F, 0.295318365F, 0.418643773F, 0.439261317F, 0.316642F,
    0.575823545F, 0.709274411F, 0.547086537F, 0.384298414F, 0.396658212F,
    0.351650536F, 0.268305629F, 0.247956619F, 0.277561F, 0.274962485F,
    0.144214272F, 0.414336503F, 0.301229149F, 0.368764311F, 0.43534115F,
    0.487668186F, 0.468302906F, 0.444998533F, 0.342931777F, 0.269356251F,
    0.371916741F, 0.257222444F, 0.250083804F, 0.347090572F, 0.200415403F,
    0.16966702F, 0.241943553F, 0.340611845F, 0.347249746F, 0.427904F,
    0.395736873F, 0.407096475F, 0.423872F, 0.363770872F, 0.296470702F,
    0.231298938F, 0.303755879F, 0.293413043F, 0.23690547F, 0.198530301F,
    0.176561341F, 0.148030296F, 0.261825979F, 0.478165835F, 0.446911573F,
    0.362981588F, 0.40410462F, 0.308432132F, 0.275767356F, 0.283072382F,
    0.270397276F, 0.22218439F, 0.16834715F, 0.269348979F, 0.175278395F,
    0.192545384F, 0.207638949F, 0.310746789F, 0.367872596F, 0.399481535F,
    0.4092924F, 0.337332517F, 0.356886983F, 0.346621841F, 0.311468452F,
    0.199051797F, 0.326999068F, 0.196032062F, 0.302930474F, 0.162185386F,
    0.180154264F, 0.243378967F, 0.267519951F, 0.369464308F, 0.394570202F,
    0.401050299F, 0.316907614F, 0.354975402F, 0.258173466F, 0.30393365F,
    0.242408946F, 0.256900579F, 0.178394288F, 0.208903179F, 0.170874327F,
    0.148054883F, 0.0726103485F, 0.315193415F, 0.453601927F, 0.309537202F,
    0.406540722F, 0.305639148F, 0.25627771F, 0.292589605F, 0.248427987F,
    0.282059938F, 0.180916026F, 0.207204074F, 0.190679014F, 0.189537704F,
    0.193166375F, 0.158482254F, 0.21107544F, 0.317971349F, 0.219160184F,
    0.297539204F, 0.223380715F, 0.264546305F, 0.298282832F, 0.185569435F,
    0.163410604F, 0.222845659F, 0.163219899F, 0.125929564F, 0.297789425F,
    0.351057261F, 0.519113481F, 0.426145F, 0.339882314F, 0.177893907F,
    0.247362852F, 0.191715762F, 0.00711771846F, 0.122739181F, 0.174886793F,
    0.0884912759F, 0.0688884556F, 0.130035192F, 0.174950272F, 0.711325705F,
    0.678913534F, 0.66311419F, 0.582433581F, 0.416949958F, 0.242021009F,
    0.104057826F, 0.142672926F, 0.130474731F, 0.0237706676F, 0.08707387F,
    0.0635194F, 0.135231197F, 0.0182901323F, 0.138617501F, 0.828288436F,
    0.751002669F, 0.74188143F, 0.605559349F, 0.496930361F, 0.241898358F,
    0.279515386F, 0.129456073F, 0.164061695F, 0.184067518F, 0.146600753F,
    0.141040251F, 0.0272455215F, 0.113271847F, 0.191743135F, 0.802925229F,
    0.820204914F, 0.734914184F, 0.628907323F, 0.512044609F, 0.444493324F,
    0.188363701F, 0.155106202F, 0.221724853F, 0.206140697F, 0.230161101F,
    0.195971385F, 0.18152386F, 0.128837168F, 0.267382085F, 0.926826537F,
    0.808491766F, 0.773511648F, 0.715178967F, 0.759951651F, 0.435520858F,
    0.285995752F, 0.163462535F, 0.209939539F, 0.304833919F, 0.294356555F,
    0.221442774F, 0.182755053F, 0.168576166F, 0.326072F, 0.87202239F, 0.823374F,
    0.747079551F, 0.757911325F, 0.625931799F, 0.546174705F, 0.366520345F,
    0.18057391F, 0.327985227F, 0.400065899F, 0.386639059F, 0.154634506F,
    0.201257691F, 0.229052886F, 0.414418697F, 0.92629993F, 0.978629F,
    0.766624391F, 0.763973832F, 0.673900485F, 0.461800694F, 0.49003312F,
    0.431599468F, 0.415062785F, 0.366845131F, 0.300759315F, 0.273702055F,
    0.242248148F, 0.231465816F, 0.320478976F };

    __attribute__ ((section(".heapsram"))) static const float fv1[16] = 
    { 53.0272179F, 53.0261574F, 0.00805905927F,
    0.00805889722F, 80.8978424F, 80.896225F, 0.00966267474F, 0.00966248102F,
    81.6599121F, 81.6582794F, 0.0151521564F, 0.0151518527F, 66.3969116F,
    66.3955841F, 0.0218249355F, 0.0218245F };

    __attribute__ ((section(".heapsram"))) static const float fv2[5] = { 0.0625F, 0.25F, 0.375F, 0.25F, 0.0625F };

/////////////////////////////////////////////////////////
// subfunctions
/////////////////////////////////////////////////////////

void mlFir(const float img[225], float filt[225])
{

  float work[225];
  int jhi;
  int k;
  int jmkom1;
  int j;
  int i;
  
  /*  mlFir */
  for (jhi = 0; jhi < 225; jhi++) {
    work[jhi] = 0.0F;
    filt[jhi] = 0.0F;
  }

  for (k = 0; k < 5; k++) {
    if (k - 1 > 0) {
      jhi = k - 2;
    } else {
      jhi = 0;
    }

    jmkom1 = k + 13;
    if (k + 13 > 15) {
      jmkom1 = 15;
    }

    for (j = 0; j < 15; j++) {
      for (i = jhi; i + 1 <= jmkom1; i++) {
#ifndef FMA
        work[i + 15 * j] += img[((i - k) + 15 * j) + 2] * fv2[k];
#else
        work[i + 15 * j] = fFMA(img[((i - k) + 15 * j) + 2], fv2[k], work[i + 15 * j]);
#endif
      }
    }
  }

  for (k = 0; k < 5; k++) {
    jhi = k + 13;
    if (k + 13 > 15) {
      jhi = 15;
    }

    if (k - 1 > 0) {
      j = k - 2;
    } else {
      j = 0;
    }

    while (j + 1 <= jhi) {
      jmkom1 = (j - k) + 2;
      for (i = 0; i < 15; i++) {
#ifndef FMA
        filt[i + 15 * j] += work[i + 15 * jmkom1] * fv2[k];
#else
        filt[i + 15 * j] = fFMA(work[i + 15 * jmkom1], fv2[k], filt[i + 15 * j]);
#endif
      }

      j++;
    }
  }
}

float sum(const float x[225])
{
  float y;
  int k;
  y = x[0];
  for (k = 0; k < 224; k++) {
    y += x[k + 1];
  }

  return y;
}

float var(const float x[225])
{
  float y;
  int ix;
  float xbar;
  int k;
  float r;
  ix = 0;
  xbar = x[0];
  for (k = 0; k < 224; k++) {
    ix++;
    xbar += x[ix];
  }

  xbar = fDiv(xbar,225.0F);
  ix = 0;
  r = x[0] - xbar;
  y = r * r;
  for (k = 0; k < 224; k++) {
    ix++;
    r = x[ix] - xbar;
    y += r * r;
  }

  y = fDiv(y,224.0F);
  return y;
}


boolean_T checkRes(const float check[2], const float golden[4])
{
  int k;
  boolean_T pass = true;

  for (k = 0; k < 2; k++) {
    pass = pass && (check[k] <= golden[k << 1]);
    pass = pass && (check[k] >= golden[1 + (k << 1)]);
  }

  return pass;
}

/////////////////////////////////////////////////////////
// main testing function 
/////////////////////////////////////////////////////////
int main(int argc, const char * const argv[])
{
  (void)argc;
  (void)argv;

  boolean_T pass;
  int coreid, k;
  float y[225];
  float tmp[2];

  /////////////////////////////////////////////////////////
  // main test loop 
  // each core loops over a kernel instance
  /////////////////////////////////////////////////////////
  
  coreid = get_core_id();

  printf("starting %d kernel iterations... (coreid = %d)\n",KERNEL_ITS,coreid);

  if (coreid>3)
    coreid=coreid-4;

  synch_barrier();

  perf_begin();

  for(k = 0; k < getKernelIts(); k++)
  {
    // matlab kernel
    mlFir(*(float (*)[225])&fv0[225 * coreid], y);
  }

  synch_barrier();

  perf_end();

  /////////////////////////////////////////////////////////
  // check results
  /////////////////////////////////////////////////////////

  synch_barrier();

  tmp[0] = sum(y);
  tmp[1] = var(y);
  pass   = checkRes(tmp, *(float (*)[4])&fv1[coreid << 2]);
  
  flagPassFail(pass, get_core_id());

/////////////////////////////////////////////////////////
// synchronize and exit
/////////////////////////////////////////////////////////

  return !pass;
}

